import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd
import os
import csv
from datetime import datetime

class DiscountedStockIndex:
    def __init__(self, sDiscGOPFileName):     
        self.sFileName = sDiscGOPFileName
        # read in data
        print("Started reading data")
        f2 = open(sDiscGOPFileName, "r")
        print("We have opened the file "+sDiscGOPFileName)
        self.datelist = []
        self.valuelist = []
        self.timelist = []
        self.qvsqrtvaluelist = []

        fTime = None
        fPrevValue = None
        for line in f2:
            x = line.split(',')
            # extract the date and value
            try:        
                datetime_object = datetime.strptime(x[0], '%d/%m/%Y')
            except:
                datetime_object = None
            try:
                fValue = float(x[1])
            except:
                fValue = None
            if (datetime_object is not None) and (fValue is not None):
                # convert date to a time in years
                yyyy = datetime_object.year
                mm = datetime_object.month
                dd = datetime_object.day
                if (fTime is None):
                    fTime = yyyy
                    fQV = 0
                else:
                    firstday_datetime_object = datetime(yyyy,1,1)
                    lastday_datetime_object = datetime(yyyy+1,1,1)
                    delta = datetime_object - firstday_datetime_object
                    DIY = lastday_datetime_object - firstday_datetime_object
                    fTime = yyyy + delta.days/DIY.days
                    diffsqrtvalue = math.sqrt(fValue)-math.sqrt(fPrevValue)
                    fQV = fQV + diffsqrtvalue*diffsqrtvalue
                self.datelist.append(datetime_object)
                self.valuelist.append(fValue)
                self.timelist.append(fTime)
                self.qvsqrtvaluelist.append(fQV)
                fPrevValue = fValue

    def __str__(self):
        return self.sFileName
    def RsquFromTau0(self, tau0,j):
        # we compute the R-squared coefficient based on the regression of:
        # tau(t) = log {[sqrt Sbar](t) + exp(tau0)}
        # on t for t = t0, t1, ..., tj
        #
        # firstly compute the means
        sum_t = 0.0
        sum_tau = 0.0
        for i in range(j+1):
            t = self.timelist[i]
            qv = self.qvsqrtvaluelist[i]
            tau = math.log(qv + math.exp(tau0))
            sum_t += t
            sum_tau += tau
        mean_t = sum_t/(j+1)
        mean_tau = sum_tau/(j+1)
        # secondly compute the sum of products of deviations
        S_t_tau = 0.0
        S_t_t = 0.0
        S_tau_tau = 0.0
        for i in range(j+1):
            t = self.timelist[i]
            qv = self.qvsqrtvaluelist[i]
            tau = math.log(qv + math.exp(tau0))
            S_t_t += (t-mean_t)**2
            S_t_tau += (t-mean_t)*(tau-mean_tau)
            S_tau_tau += (tau-mean_tau)**2
        Rsqu = S_t_tau**2/(S_t_t*S_tau_tau)
        return Rsqu
    def GetOptimalTau0V2(self,n):
        # we compute the optimal value of tau0 which maximises the R-squared coefficient
        # based on the regression of tau(t) on t for t=t0,t1,...,tn
        # initial estimate of optimal tau0 is 0
        # uses the Newton-Raphson method
        tau0 = -3.0
        dtau0 = 0.001
        eps = 0.0000001
        f_tau0 = self.RsquFromTau0(tau0,n)
        f_tau0_minus_dtau = self.RsquFromTau0(tau0-dtau0,n)
        f_tau0_plus_dtau = self.RsquFromTau0(tau0+dtau0,n)
        df_dtau0 = (f_tau0_plus_dtau - f_tau0_minus_dtau)/(2.0*dtau0)
        nIterations = 0
        while abs(df_dtau0)>eps and nIterations<20:
            nIterations = nIterations + 1
            d2f_dtau02 = (f_tau0_plus_dtau + f_tau0_minus_dtau - 2.0*f_tau0)/(dtau0*dtau0)
            print("tau0=",tau0,", f_tau0=",f_tau0,"df_dtau0=",df_dtau0)
            tau0 = tau0 - df_dtau0/d2f_dtau02
            f_tau0 = self.RsquFromTau0(tau0,n)
            f_tau0_minus_dtau = self.RsquFromTau0(tau0-dtau0,n)
            f_tau0_plus_dtau = self.RsquFromTau0(tau0+dtau0,n)
            df_dtau0 = (f_tau0_plus_dtau - f_tau0_minus_dtau)/(2*dtau0)
        print("nIterations = ",nIterations)
        return tau0
    def GetOptimalTau0(self,j):
        # we compute the optimal value of tau0 which maximises the R-squared coefficient
        # based on the regression of tau(t) on t for t=t0,t1,...,tj
        # initial estimate of optimal tau0 is 0
        dtau = 1.0
        tau0 = 0.0
        eps = 0.00000001
        f_tau0 = self.RsquFromTau0(tau0,j)
        bFoundMax = False
        while(not bFoundMax):
            tau2 = tau0 + dtau
            tau1 = tau0 - dtau            
            f_tau1 = self.RsquFromTau0(tau1,j)
            f_tau2 = self.RsquFromTau0(tau2,j)
            if(f_tau1>f_tau0):
                if(f_tau2>f_tau1):
                    tau0 = tau2
                    f_tau0 = f_tau2
                else:
                    tau0 = tau1
                    f_tau0 = f_tau1
            elif(f_tau2>f_tau0):
                    tau0 = tau2
                    f_tau0 = f_tau2
            else:
                dtau = dtau*0.5
                bFoundMax = dtau<eps
        return tau0
    def ProfitFromHedging(self,i,j,tau0):
        # we compute the profit from hedging a ZCB commencing at t0 = t(i) and maturing at T = t(j)
        # firstly, compute the market activity tau commencing with tau0
        # i in {0,1,...,n-2} and j in {i+1,...,n-1} where n = len(self.timelist)
        # note that tau0 is a pre-computed quantity based on the regression of
        # tau(t)=log { [sqrt(Sbar)] + exp(tau0) } on t for t=t(0),...,t(i)
        taulist = []
        Rsqu = None
        Slope = None
        Intercept = None
        n = len(self.timelist)
        # compute list of tau values
        for it in range(n):
            t = self.timelist[it]
            qv = self.qvsqrtvaluelist[it]
            tau = math.log(qv + math.exp(tau0))
            taulist.append(tau)
        # calculate the R-squ, slope and intercept for computing taubar values
        # we regress tau(t) on t for t=t(0),...,t(i)
        #
        # firstly compute the means
        sum_t = 0.0
        sum_tau = 0.0
        for it in range(i+1):
            t = self.timelist[it]
            tau = taulist[it]
            sum_t += t
            sum_tau += tau
        mean_t = sum_t/(i+1)
        mean_tau = sum_tau/(i+1)        
        # secondly compute the sum of products of deviatinos        
        S_t_tau = 0.0
        S_t_t = 0.0
        S_tau_tau = 0.0
        for it in range(i+1):
            t = self.timelist[it]
            tau = taulist[it]
            S_t_tau += (t-mean_t)*(tau-mean_tau)
            S_t_t += (t-mean_t)*(t-mean_t)
            S_tau_tau += (tau-mean_tau)*(tau-mean_tau)
        cov_t_tau = S_t_tau/(i+1)
        var_t = S_t_t/(i+1)
        var_tau = S_tau_tau/(i+1)
        Rsqu = (cov_t_tau**2)/(var_t*var_tau) if var_t>0 else 0.0
        Slope = cov_t_tau/var_t if var_t>0 else 0.0
        Intercept = mean_tau-mean_t*Slope
        taubarlist = [(Intercept + Slope*t) for t in self.timelist]
        # now compute the hedge quantities in respect of a T-maturity ZCB
        Exp_t_T = []
        fPrevExp_t_T = None
        for t in range(i,j+1):
            fExp_t_T = 0
            if(fPrevExp_t_T==0):
                fExp_t_T = 0
            elif(taubarlist[j]>taulist[t]):
                #print("taubarlist[j]",taubarlist[j],"taulist[t]",taulist[t])
                #print("self.valuelist[t]",self.valuelist[t],"taubarlist[j]",taubarlist[j],"taulist[t]",taulist[t])
                fExp_t_T = math.exp(-0.5*self.valuelist[t]/(math.exp(taubarlist[j])-math.exp(taulist[t])))
            Exp_t_T.append(fExp_t_T)
            fPrevExp_t_T = fExp_t_T
        P_t_T = [(1.0-Exp_t_T[t]) for t in range(j+1-i)]
        pi_t = [(math.log(1.0-P_t_T[0])*(1.0-1.0/P_t_T[0]) if P_t_T[0]<1 else 0)]
        Z_t_T=[P_t_T[0]]        
        for it in range(1,j+1-i):            
            Z_t_T.append(Z_t_T[it-1]*(1+pi_t[it-1]*(self.valuelist[it+i]/self.valuelist[it+i-1]-1)))
            pi_t.append((math.log(1.0-Z_t_T[it])*(1.0-1.0/Z_t_T[it]) if Z_t_T[it]<1 else 0))
        C_t = [(-Z_t_T[t]+P_t_T[t]) for t in range(j+1-i)]
        #FLVR_t = [(Z_t_T[t]/P_t_T[0] - 1) for t in range(j+1-i)]
        FLVR_t = [(Z_t_T[t]-P_t_T[0] ) for t in range(j+1-i)]
        #profitatmaturity = Z_t_T[j-i]/P_t_T[0] - 1
        profitatmaturity = Z_t_T[j-i]-P_t_T[0]
        maxabshedgeerror = max([abs(x) for x in C_t])
        if(False):
            # print out for testing
            testdata = {"Date": self.datelist[i:j+1],
                    "Sbar_t": self.valuelist[i:j+1],
                    "Time t": self.timelist[i:j+1],
                    "QV sqrt(Sbar)": self.qvsqrtvaluelist[i:j+1],
                    "tau_t": taulist[i:j+1],
                    "taubar_t": taubarlist[i:j+1],
                    "P_t_T": P_t_T,
                    "pi_t": pi_t,
                    "Z_t_T": Z_t_T,
                    "Hedge Error": C_t,
                    "Potential FLVR": FLVR_t}
            testdf = pd.DataFrame(testdata)
            testdf.to_csv("TestOutput.csv")
        return [profitatmaturity,maxabshedgeerror,Rsqu]

# Main program
datetime_start = datetime.now()
print("Started at ",datetime_start)

# Firstly, define the date file name.
# Please remove the comment symbol # for the appropriate csv file used for the backtest.
#
#sDiscGOPFileName = "DiscSP500.csv"
#sDiscGOPFileName = "DiscTorontoSX.csv"
#sDiscGOPFileName = "DiscCAC40.csv"
#sDiscGOPFileName = "DiscDAX40.csv"
#sDiscGOPFileName = "DiscMIB40.csv"
#sDiscGOPFileName = "DiscFTSE100.csv"
#sDiscGOPFileName = "DiscNikkei225.csv"
#sDiscGOPFileName = "DiscMXWO.csv"
sDiscGOPFileName = "DiscountedStockIndex.csv"

DiscGOP_Data = DiscountedStockIndex(sDiscGOPFileName)


month_start_indices = []
n = len(DiscGOP_Data.datelist)
mth_prev = DiscGOP_Data.datelist[0].month
for j in range(1,n):
    mth_curr = DiscGOP_Data.datelist[j].month
    if(abs(mth_curr-mth_prev)>0.1):
        month_start_indices.append(j)
    mth_prev = mth_curr


f = open('FLVR_Backtests_'+sDiscGOPFileName,'w')
f.write("i0,j0,i,j,t(i),t(j),tau0,Rsqu,FLVR(i j),max |C(i j)|\n")


minYearsToMaturity = 15
maxYearsToMaturity = 17

imin = 120 # corresponds to 10 years and 1 month
imax = len(month_start_indices)-1-minYearsToMaturity*12
jmax = len(month_start_indices)-1

for i0 in range(imin,imax+1):
    for j0 in range(i0+minYearsToMaturity*12,min(i0+maxYearsToMaturity*12,jmax)+1):
#for i0 in range(149,150):
#    for j0 in range(329,330):
        print("Doing: (",i0,j0,"). Finishes: (",imax,jmax,")")
        tau0 = DiscGOP_Data.GetOptimalTau0(i0)
        i = month_start_indices[i0]
        j = month_start_indices[j0]
        ti = DiscGOP_Data.datelist[i]
        tj = DiscGOP_Data.datelist[j]
        #print("(ti,tj) = (",ti,tj,")")
        [profit,maxabshedgeerr,Rsqu]=DiscGOP_Data.ProfitFromHedging(i,j,tau0)
        line = "{},{},{},{},{},{},{},{},{},{}\n".format(i0,j0,i,j,ti,tj,tau0,Rsqu,profit,maxabshedgeerr)
        f.write(line)
f.close()
print("The job is done and the file has been written.")

datetime_end = datetime.now()
print("Ended at ",datetime_end," and time elapsed = ",datetime_end-datetime_start)